package demo;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

public class DataFrameApp {

    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder().appName("DataFrameApp").master("local[2]").getOrCreate();
        Dataset<Row> people = spark.read().json("F:\\lzc\\SparkSQL\\sparklearning\\src\\main\\java\\demo\\people.json");


        people.printSchema();
        people.show();

        // select name from table
        people.select("name").show();
        // select name,age+10 as age2 from table
        people.select(people.col("name"),(people.col("age").plus(10)).as("age2")).show();
        // select * from table where age > 20  >, <, >=, <= (gt, lt, geq, leq)  ==, != (eq, ne)
        people.filter(people.col("age").gt(20)).show();
        //
        people.groupBy("age").count().show();

        people.sort(people.col("name").desc(),people.col("age").asc()).show();

        spark.stop();
    }
}
